options compress=YES;
libname manasi ''; 

* ELIMINATE DUPLICATES BY HUN AND PAN AND SORT ;
proc sort data=manasi.parents_comb_wkidapps out=manasi.parents_comb_wkidapps ;
	by par_ssn hun;
run;

* GET RID OF UNKNOWN PARENT SSNs ;
data manasi.parents_comb_wkidapps_wide;
	set manasi.parents_comb_wkidapps;
	if par_ssn=. or par_ssn='' or par_ssn='000000000' then delete;
run;

* CREATE ID VARIABLE FOR KIDS;
data manasi.parents_comb_wkidapps_wide;
	set manasi.parents_comb_wkidapps_wide;
	kid_id + 1;
	by par_ssn;
	if first.par_ssn then kid_id=1;
run;

* SEE HOW MANY SSI KIDS EACH PARENT HAS AND MERGE INTO PARENT DATASET;
proc means data=manasi.parents_comb_wkidapps_wide noprint;
	by par_ssn;
	var kid_id;
	output out=manasi.parents_comb_wkidapps_kidid max(kid_id)=max_kidid;
run;

proc means mean median min max p75 p90 p95 p99 data=manasi.parents_comb_wkidapps_kidid;
	var max_kidid;
run;

data manasi.parents_comb_wkidapps_wide;
	merge 	
			manasi.parents_comb_wkidapps_wide 
			manasi.parents_comb_wkidapps_kidid
				(keep = par_ssn max_kidid);
	by par_ssn; 
run;

data manasi.parents_comb_wkidapps_wide;
	set manasi.parents_comb_wkidapps_wide;
	if kid_id<=7;
run;

* COLLAPSE TO PAN LEVEL TO GET ONE RECORD FOR EACH PARENT;
data manasi.parents_comb_wkidapps_wide;
	set manasi.parents_comb_wkidapps_wide;
	by par_ssn;

	keep 	par_ssn 
			kidhun1-kidhun7
			appdate1_1-appdate1_7
			appdate2_2-appdate2_7
			appdate3_1-appdate3_7
			appdate4_1-appdate4_7
			appdate5_1-appdate5_7
			appdate6_1-appdate6_7
			appdate7_1-appdate7_7
			;	

	retain 	
			kidhun1-kidhun7
			appdate1_1-appdate1_7
			appdate2_2-appdate2_7
			appdate3_1-appdate3_7
			appdate4_1-appdate4_7
			appdate5_1-appdate5_7
			appdate6_1-appdate6_7
			appdate7_1-appdate7_7
			;

	array 	akidhun(1:7) $ 9	kidhun1-kidhun7 ; 
	array 	aappdate1(1:7)		appdate1_1-appdate1_7;
	array 	aappdate2(1:7)		appdate2_1-appdate2_7;
	array 	aappdate3(1:7)		appdate3_1-appdate3_7;
	array 	aappdate4(1:7)		appdate4_1-appdate4_7;
	array 	aappdate5(1:7)		appdate5_1-appdate5_7;
	array 	aappdate6(1:7)		appdate6_1-appdate6_7;
	array 	aappdate7(1:7)		appdate7_1-appdate7_7;

	if first.par_ssn then
	do;
		do i=1 to 7;
			akidhun(i)="";
			aappdate1(i) = .;
			aappdate2(i) = .;
			aappdate3(i) = .;
			aappdate4(i) = .;
			aappdate5(i) = .;
			aappdate6(i) = .;
			aappdate7(i) = .;
		end;
	end;

	akidhun(kid_id) = hun ;	
	aappdate1(kid_id) = appdate1;
	aappdate2(kid_id) = appdate2;
	aappdate3(kid_id) = appdate3;
	aappdate4(kid_id) = appdate4;
	aappdate5(kid_id) = appdate5;
	aappdate6(kid_id) = appdate6;
	aappdate7(kid_id) = appdate7;

	if last.par_ssn then output;

run;


* MERGE (SAMPLE) SSNs WITH PARENT-KID FILE ;
proc sort data=manasi.parents_comb_hunpan out=manasi.parents_comb_hunpan nodupkey;
	by pan hun;
run;

data manasi.kids_comb_wsibapps_wide;
	merge 	
			manasi.parents_comb_hunpan
				(rename=(pan=par_ssn) keep=pan hun in=fromkidsx)
			
			manasi.parents_comb_wkidapps_wide
				(rename=(kidhun1=sibssn1 kidhun2=sibssn2 kidhun3=sibssn3 
					kidhun4=sibssn4 kidhun5=sibssn5 kidhun6=sibssn6 kidhun7=sibssn7) 
					in=fromparx);

	by par_ssn; 
	frompar = fromparx;
	fromkids = fromkidsx;

run;

proc tabulate data=manasi.kids_comb_wsibapps_wide;
	class frompar fromkids;
	table frompar, fromkids;
run;

* SORT BY (SAMPLE) SSN TO GET KID'S SIBLINGS ALL TOGETHER;
proc sort data=manasi.kids_comb_wsibapps_wide out=manasi.kids_comb_wsibapps_wide ;
	by hun;
run;

* SORT BY (SAMPLE) SSN TO GET KID'S SIBLINGS ALL TOGETHER;
proc sort data=manasi.kids_comb_wsibapps_wide out=manasi.kids_comb_wsibapps_wide ;
	by hun;
run;

* RESHAPE LONG SO THAT THERE IS ONE RECORD PER KID-SIBLING PAIR 
	(ELIMINATE DUPLICATE SIBLINGS);
data manasi.kids_comb_wsibapps_long;
	set manasi.kids_comb_wsibapps_wide;

	array 	asibssn(1:7) $ 9	sibssn1-sibssn7;
	array 	aappdate1(1:7)		appdate1_1-appdate1_7;
	array 	aappdate2(1:7)		appdate2_1-appdate2_7;
	array 	aappdate3(1:7)		appdate3_1-appdate3_7;
	array 	aappdate4(1:7)		appdate4_1-appdate4_7;
	array 	aappdate5(1:7)		appdate5_1-appdate5_7;
	array 	aappdate6(1:7)		appdate6_1-appdate6_7;
	array 	aappdate7(1:7)		appdate7_1-appdate7_7;
	
	do sib = 1 to 7;
		sibssn = asibssn(sib);
		appdate1 = aappdate1(sib);
		appdate2 = aappdate2(sib);
		appdate3 = aappdate3(sib);
		appdate4 = aappdate4(sib);
		appdate5 = aappdate5(sib);
		appdate6 = aappdate6(sib);
		appdate7 = aappdate7(sib);

		output;
	end;

	drop 	sibssn1-sibssn7 
			appdate1_1-appdate1_7
			appdate2_1-appdate2_7
			appdate3_1-appdate3_7
			appdate4_1-appdate4_7
			appdate5_1-appdate5_7
			appdate6_1-appdate6_7
			appdate7_1-appdate7_7
			;	
run;

* ELIMINATE DUPLICATE SIBLINGS AND OWN RECORD;
data manasi.kids_comb_wsibapps_long;
	set manasi.kids_comb_wsibapps_long;
	
	* Delete missing sibs and own record;
	if sibssn="" or sibssn=hun then delete;

	* Drop variables;
	drop frompar fromkids;

run;

* Eliminate duplicate siblings;
proc sort data=manasi.kids_comb_wsibapps_long out=manasi.kids_comb_wsibapps_long nodupkey ;
	by hun sibssn;
run;
